library(ggplot2)
library(devtools)
library(phyloseq)
library(picante)
library(tidyr)
library(forcats)
library(dplyr)
library(tibble)
library(cowplot)
library(picante) # Will also include ape and vegan
library(car) # For residual analysis
library(sandwich) # for vcovHC function in post-hoc test
library(MASS) # For studres in plot_residuals function
library(boot) # For cross validation
source("code/Muskegon_functions.R")
source("code/set_colors.R")# Loads a phyloseq object named otu_merged_musk_pruned)
load("data/surface_PAFL_otu_pruned_raw.RData")
# The name of the phyloseq object is:
surface_PAFL_otu_pruned_raw ## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7806 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 7806 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 7806 tips and 7804 internal nodes ]
# Remove doubletons!
surface_PAFL_otu_pruned_rm2 <- prune_taxa(taxa_sums(surface_PAFL_otu_pruned_raw) > 2, surface_PAFL_otu_pruned_raw)
surface_PAFL_otu_pruned_rm2## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2979 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 2979 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2979 tips and 2977 internal nodes ]
# Remove tree for computational efficiency
surface_PAFL_otu_pruned_notree_rm2 <- phyloseq(tax_table(surface_PAFL_otu_pruned_rm2), otu_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))
# Gather the metadata in a dataframe to play with
metadata <- data.frame(sample_data(surface_PAFL_otu_pruned_notree_rm2)) %>%
mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))scaled_surface_PAFLA_pruned_rm2 <- scale_reads(surface_PAFL_otu_pruned_notree_rm2)
set.seed(777)
# Calculate the SOREN distance
soren_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = TRUE) # Make it presence/absence
p1 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = soren_whole_dist,
color = "fraction",
shape = "lakesite",
title = "Sørensen") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = c(21, 22, 23, 24)) +
theme(legend.position = "bottom")
# Calculate the BRAY-CURTIS distance
bray_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = FALSE) # Make it Abundance weighted
p2 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = bray_whole_dist ,
color = "fraction",
shape = "lakesite",
title = "Bray-Curtis") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = c(21, 22, 23, 24)) +
theme(legend.position = "bottom")
plot_grid(p1, p2, labels = c("A", "B"), align = "h", ncol = 2)######################################################### Fraction ABUNDANCe
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox##
## Wilcoxon rank sum test
##
## data: log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(as.numeric(fraction_bac_abund)))## # A tibble: 2 × 2
## fraction `mean(as.numeric(fraction_bac_abund))`
## <fctr> <dbl>
## 1 Particle 41168.88
## 2 Free 734522.25
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree
poster_a <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
ylab("Log10(Bacterial Cells/mL)") +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox##
## Wilcoxon rank sum test
##
## data: frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(frac_bacprod))## # A tibble: 2 × 2
## fraction `mean(frac_bacprod)`
## <fctr> <dbl>
## 1 Particle 9.958333
## 2 Free 24.058333
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(67,68,68,67)) # WholePart vs WholeFree
poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
ylab("Heterotrophic Production (μgC/L/hr)") +
##### Particle vs free bulk production
geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=68, fontface = "bold", size = 3.5, color = "gray40",
label= paste("**\np =", round(totprod_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox##
## Wilcoxon rank sum test
##
## data: log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
group_by(fraction) %>%
summarize(mean(fracprod_per_cell))## # A tibble: 2 × 2
## fraction `mean(fracprod_per_cell)`
## <fctr> <dbl>
## 1 Particle 4.816116e-07
## 2 Free 3.866798e-08
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree
poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(fracprod_per_cell), x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
ylim(c(-8.5, -5)) +
ylab("log10(production/cell) (μgC/cell/hr)") +
##### Particle vs free per-cell production
geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=-5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(percellprod_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",
axis.title.x = element_blank())
plot_grid(poster_a, poster_b, poster_c,
labels = c("A", "B", "C"),
ncol = 3)work_df <- metadata %>%
dplyr::select(norep_filter_name, fraction, fraction_bac_abund, frac_bacprod, fracprod_per_cell_noinf) %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1,4), substr(norep_filter_name, 6,9), sep = "")) %>%
dplyr::select(-norep_filter_name)
part_work_df <- work_df %>%
filter(fraction == "Particle") %>%
rename(part_bacabund = fraction_bac_abund,
part_prod = frac_bacprod,
part_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
free_work_df <- work_df %>%
filter(fraction == "Free") %>%
rename(free_bacabund = fraction_bac_abund,
free_prod = frac_bacprod,
free_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
byfrac_df <- part_work_df %>%
left_join(free_work_df, by = "norep_water_name")
summary(lm(log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, norep_water_name != "MOTE515")))##
## Call:
## lm(formula = log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df,
## norep_water_name != "MOTE515"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52690 -0.08048 0.05903 0.19565 0.35255
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0900 3.2247 0.648 0.533
## log10(free_bacabund) 0.4264 0.5532 0.771 0.461
##
## Residual standard error: 0.3111 on 9 degrees of freedom
## Multiple R-squared: 0.06194, Adjusted R-squared: -0.04229
## F-statistic: 0.5943 on 1 and 9 DF, p-value: 0.4605
plot1 <- ggplot(filter(byfrac_df, norep_water_name != "MOTE515"),
aes(x = log10(free_bacabund), y = log10(part_bacabund))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(cells/mL)") +
geom_point(size = 3, shape = 21, fill = "grey")
lm_prod_corr <- lm(part_prod ~ free_prod, data = byfrac_df)
plot2 <- ggplot(byfrac_df, aes(x = free_prod, y = part_prod)) +
xlab("Free") + ylab("Particle") +
ggtitle("Heterotrophic Production \n(μgC/L/hr)") +
geom_point(size = 3, shape = 21, fill = "grey") +
geom_smooth(method = "lm", color = "black") +
annotate("text", x =20, y= 30, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_prod_corr)$coefficients[,4][2]), digits = 3)))
lm_percell_corr <- lm(log10(part_percell_prod) ~ log10(free_percell_prod), data = byfrac_df)
plot3 <- ggplot(byfrac_df,
aes(x = log10(free_percell_prod), y = log10(part_percell_prod))) +
xlab("Free") + ylab("Particle") +
ggtitle("log10(production/cell) \n (μgC/cell/hr)") +
geom_point(size = 3, shape = 21, fill = "grey") +
geom_smooth(method = "lm", color = "black") +
annotate("text", y = -5.8, x=-7.9, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_corr)$coefficients[,4][2]), digits = 3)))
plot_grid(plot1, plot2, plot3,
nrow = 1, ncol = 3,
labels = c("A", "B", "C"),
align = "h")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on '(μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
# Set the seed for reproducibility
set.seed(777)
# Calculate the alpha diversity with 100 repsampling events
alpha_calc <- calc_alpha_diversity(physeq = surface_PAFL_otu_pruned_notree_rm2)
# What was the minimum sample size?
min(sample_sums(surface_PAFL_otu_pruned_notree_rm2)) - 1## [1] 6489
# Put it altogether in a dataframe
otu_alphadiv <- calc_mean_alphadiv(physeq = surface_PAFL_otu_pruned_notree_rm2,
richness_df = alpha_calc$Richness,
evenness_df = alpha_calc$Inverse_Simpson,
shannon_df = alpha_calc$Shannon) %>%
mutate(fraction = factor(fraction, levels = c("WholePart","WholeFree")),
lakesite = factor(lakesite, levels = c("MOT", "MDP", "MBR", "MIN")),
measure = factor(measure, levels = c("Richness", "Shannon_Entropy", "Inverse_Simpson", "Simpsons_Evenness")),
fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))##########################################################################
########################### CORRELATIONS #############################
##########################################################################
# RICHNESS vs SHANNON
cor(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean) # YES## [1] 0.9406491
# SHANNON VS INVERSE SIMPSON
cor(filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")$mean) # YES## [1] 0.9651242
# INVERSE SIMPSON VS SIMPSONS EVENNESS
cor(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean) # YES## [1] 0.9145095
# SIMPSONS EVENNESS VS RICHNESS
cor(filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")$mean,
filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")$mean) # YES## [1] 0.6881205
ggplot(otu_alphadiv, aes(y = mean, x = fraction)) +
ylab("Mean Diversity Value") +
facet_wrap(~measure, scale = "free_y", ncol = 4) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), color = "black") +
geom_boxplot(alpha = 0.5, outlier.shape = NA, color = "black", aes(fill = fraction)) +
scale_fill_manual(values = fraction_colors) +
scale_color_manual(values = fraction_colors) +
theme(legend.position = "none", axis.title.x = element_blank()) #################################### Bulk Measure Production
# Richness
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Richness")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Richness"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.057 -12.017 -5.654 9.256 46.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.501168 9.039579 2.047 0.0528 .
## mean -0.003335 0.018911 -0.176 0.8616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared: 0.001412, Adjusted R-squared: -0.04398
## F-statistic: 0.0311 on 1 and 22 DF, p-value: 0.8616
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Richness" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1271 -2.8865 -0.4649 3.7537 9.8401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.269531 5.717450 -1.971 0.07701 .
## mean 0.038125 0.009868 3.863 0.00314 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared: 0.5988, Adjusted R-squared: 0.5587
## F-statistic: 14.93 on 1 and 10 DF, p-value: 0.003143
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Richness" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.094 -11.112 -2.755 8.611 33.308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.40793 21.62006 0.250 0.808
## mean 0.05511 0.06207 0.888 0.395
##
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared: 0.07306, Adjusted R-squared: -0.01963
## F-statistic: 0.7882 on 1 and 10 DF, p-value: 0.3955
# Shannon
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Shannon_Entropy"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.659 -11.878 -5.666 9.231 46.593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63373 26.71652 0.623 0.540
## mean 0.08797 6.22969 0.014 0.989
##
## Residual standard error: 15.55 on 22 degrees of freedom
## Multiple R-squared: 9.064e-06, Adjusted R-squared: -0.04545
## F-statistic: 0.0001994 on 1 and 22 DF, p-value: 0.9889
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Shannon_Entropy" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9022 -2.9150 -0.5875 1.6713 12.0544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -40.320 14.007 -2.878 0.01643 *
## mean 11.089 3.068 3.614 0.00473 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.693 on 10 degrees of freedom
## Multiple R-squared: 0.5664, Adjusted R-squared: 0.5231
## F-statistic: 13.06 on 1 and 10 DF, p-value: 0.004734
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Shannon_Entropy" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.004 -10.708 -3.738 6.632 37.129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.37 73.87 -0.181 0.860
## mean 9.40 18.50 0.508 0.622
##
## Residual standard error: 18.15 on 10 degrees of freedom
## Multiple R-squared: 0.02516, Adjusted R-squared: -0.07233
## F-statistic: 0.2581 on 1 and 10 DF, p-value: 0.6225
# Inverse Simpson
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Inverse_Simpson"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.269 -10.298 -4.916 5.866 46.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.1577 6.0225 2.019 0.0559 .
## mean 0.1629 0.1731 0.941 0.3570
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared: 0.03868, Adjusted R-squared: -0.005019
## F-statistic: 0.8851 on 1 and 22 DF, p-value: 0.357
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Inverse_Simpson" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5589 -2.1093 -0.1969 0.8752 7.8282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.45199 2.43866 -0.185 0.85666
## mean 0.29344 0.05781 5.076 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6925
## F-statistic: 25.77 on 1 and 10 DF, p-value: 0.0004804
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Inverse_Simpson" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.765 -9.356 -4.445 6.116 36.017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.0985 16.7052 0.664 0.521
## mean 0.5379 0.6598 0.815 0.434
##
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared: 0.06233, Adjusted R-squared: -0.03143
## F-statistic: 0.6648 on 1 and 10 DF, p-value: 0.4339
# Simpson's Evenness
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Simpsons_Evenness"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.000 -7.163 -3.815 3.392 45.845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8524 9.2286 -0.092 0.9272
## mean 274.1606 134.4253 2.040 0.0536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.26 on 22 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1208
## F-statistic: 4.16 on 1 and 22 DF, p-value: 0.05359
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Simpsons_Evenness" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.973 -2.229 -1.086 1.356 12.380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.829 4.539 -0.844 0.41865
## mean 234.057 71.238 3.286 0.00821 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.995 on 10 degrees of freedom
## Multiple R-squared: 0.5191, Adjusted R-squared: 0.471
## F-statistic: 10.79 on 1 and 10 DF, p-value: 0.008211
summary(lm(frac_bacprod ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(otu_alphadiv,
## measure == "Simpsons_Evenness" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.389 -12.029 -3.306 4.668 39.946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.85 23.52 0.674 0.516
## mean 114.96 321.02 0.358 0.728
##
## Residual standard error: 18.27 on 10 degrees of freedom
## Multiple R-squared: 0.01266, Adjusted R-squared: -0.08607
## F-statistic: 0.1282 on 1 and 10 DF, p-value: 0.7277
#################################### Per-Cell Production
# Richness
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Richness")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Richness"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8668 -0.2124 0.1045 0.2133 0.6473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.4591277 0.2212633 -38.231 < 2e-16 ***
## mean 0.0028988 0.0004641 6.246 3.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3804 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6501, Adjusted R-squared: 0.6334
## F-statistic: 39.02 on 1 and 21 DF, p-value: 3.395e-06
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Richness" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55347 -0.21545 -0.01066 0.12536 0.58830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.0617648 0.3717671 -21.685 4.44e-09 ***
## mean 0.0023794 0.0006348 3.748 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3506 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6095, Adjusted R-squared: 0.5662
## F-statistic: 14.05 on 1 and 9 DF, p-value: 0.004567
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Richness" & fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Richness" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71719 -0.13833 0.07155 0.23581 0.56949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.135758 0.471253 -17.264 8.99e-09 ***
## mean 0.001657 0.001353 1.225 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared: 0.1304, Adjusted R-squared: 0.04344
## F-statistic: 1.499 on 1 and 10 DF, p-value: 0.2488
# Shannon
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Shannon_Entropy"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.03514 -0.15042 -0.03394 0.26568 0.82794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.9036 0.7534 -14.472 2.14e-12 ***
## mean 0.8805 0.1763 4.993 6.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4348 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5428, Adjusted R-squared: 0.521
## F-statistic: 24.93 on 1 and 21 DF, p-value: 6.09e-05
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Shannon_Entropy" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36686 -0.23571 -0.01330 0.03961 0.70312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.8897 0.8820 -11.213 1.37e-06 ***
## mean 0.6993 0.1935 3.614 0.00562 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3584 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5921, Adjusted R-squared: 0.5468
## F-statistic: 13.06 on 1 and 9 DF, p-value: 0.00562
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Shannon_Entropy" & fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Shannon_Entropy" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72444 -0.17785 0.08114 0.13946 0.67366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.8666 1.6332 -5.429 0.000289 ***
## mean 0.3243 0.4091 0.793 0.446298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4014 on 10 degrees of freedom
## Multiple R-squared: 0.05914, Adjusted R-squared: -0.03495
## F-statistic: 0.6285 on 1 and 10 DF, p-value: 0.4463
# Inverse Simpson
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Inverse_Simpson"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97302 -0.28199 -0.05285 0.32003 0.99088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.86039 0.18153 -43.301 < 2e-16 ***
## mean 0.02355 0.00525 4.485 0.000204 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4596 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4893, Adjusted R-squared: 0.4649
## F-statistic: 20.12 on 1 and 21 DF, p-value: 0.0002037
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Inverse_Simpson" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28651 -0.18384 -0.11125 0.07337 0.56444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.360961 0.159490 -46.153 5.27e-12 ***
## mean 0.018087 0.003759 4.812 0.000958 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2969 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7201, Adjusted R-squared: 0.689
## F-statistic: 23.15 on 1 and 9 DF, p-value: 0.0009581
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Inverse_Simpson" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68269 -0.11512 0.01325 0.17742 0.63175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.03513 0.35685 -22.517 6.72e-10 ***
## mean 0.01910 0.01409 1.355 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared: 0.1551, Adjusted R-squared: 0.07064
## F-statistic: 1.836 on 1 and 10 DF, p-value: 0.2052
# Simpson's Evenness
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Simpsons_Evenness"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06874 -0.41920 0.04553 0.34511 1.56565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4496 0.4116 -18.10 2.74e-14 ***
## mean 4.3470 6.0344 0.72 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6353 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02411, Adjusted R-squared: -0.02236
## F-statistic: 0.5189 on 1 and 21 DF, p-value: 0.4792
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Simpsons_Evenness" & fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4726 -0.2230 -0.1106 0.1248 0.6887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5941 0.2885 -26.324 7.96e-10 ***
## mean 15.1887 4.6341 3.278 0.00957 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3788 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.4935
## F-statistic: 10.74 on 1 and 9 DF, p-value: 0.009566
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv, measure == "Simpsons_Evenness" & fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(otu_alphadiv,
## measure == "Simpsons_Evenness" & fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63112 -0.24216 0.01388 0.14672 0.77426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9274 0.5202 -15.240 3e-08 ***
## mean 4.9361 7.1008 0.695 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4041 on 10 degrees of freedom
## Multiple R-squared: 0.04609, Adjusted R-squared: -0.0493
## F-statistic: 0.4832 on 1 and 10 DF, p-value: 0.5028
prodiv_plot1 <- ggplot(filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness")),
aes(x = mean, y = frac_bacprod)) +
ylab("Heterotrophic Production \n(ug C/L/hr)") +
xlab("Mean Diversity Value") +
facet_wrap(~measure, scale = "free_x", ncol = 4) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3, shape = 21, aes(fill = fraction), color = "black") +
scale_fill_manual(values = fraction_colors) +
scale_color_manual(values = fraction_colors) +
theme(legend.position = "none", axis.title.x = element_blank()) +
geom_smooth(method = "lm",
data = filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness") &
fraction == "Particle"),
color = "#FF6600", fill = "#FF6600", alpha = 0.3)
prodiv_plot2 <- ggplot(filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness")),
aes(x = mean, y = log10(fracprod_per_cell_noinf))) +
ylab("log10(production/cell) \n (ug C/cell/hr)") +
xlab("Mean Diversity Value") +
facet_wrap(~measure, scale = "free_x", ncol = 4) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3, shape = 21, aes(fill = fraction), color = "black") +
scale_fill_manual(values = fraction_colors) +
scale_color_manual(values = fraction_colors) +
theme(legend.position = "none", axis.title.x = element_blank()) +
geom_smooth(method = "lm",
data = filter(otu_alphadiv, measure %in% c("Shannon_Entropy", "Simpsons_Evenness") &
fraction == "Particle"),
color = "#FF6600", fill = "#FF6600", alpha = 0.3)
plot_grid(prodiv_plot1, prodiv_plot2,
labels = c("A", "B"),
nrow = 2, ncol = 1)## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_errorbarh).
## Warning: Removed 2 rows containing missing values (geom_point).
lm_simpseven_bulkprod <- lm(frac_bacprod ~ mean,
data = filter(otu_alphadiv, measure == "Simpsons_Evenness"))
ggplot(filter(otu_alphadiv, measure == "Simpsons_Evenness"),
aes(x = mean, y = frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
ylab("Heterotrophic Production \n(ug C/L/hr)") +
xlab("Simpson's Evenness") +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_fill_manual(values = fraction_colors) +
scale_color_manual(values = fraction_colors) +
theme(legend.position = c(0.15, 0.9), legend.title = element_blank()) +
annotate("text", x = 0.115, y=55, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_simpseven_bulkprod)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_simpseven_bulkprod)$coefficients[,4][2]), digits = 3))) ######################################################### OBSERVED RICHNESS
richness <- filter(otu_alphadiv, measure == "Richness")
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
group_by(fraction) %>%
summarize(mean(mean))## # A tibble: 2 × 2
## fraction `mean(mean)`
## <fctr> <dbl>
## 1 Particle 556.7992
## 2 Free 338.4242
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree
rich_distribution_plot <-
ggplot(richness, aes(y = mean, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
xlab("Observed Richness") +
xlab("Fraction") +
##### Particle vs free per-cell production
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=790, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(rich_fraction_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Richness
summary(lm(frac_bacprod ~ mean, data = richness)) ##
## Call:
## lm(formula = frac_bacprod ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.057 -12.017 -5.654 9.256 46.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.501168 9.039579 2.047 0.0528 .
## mean -0.003335 0.018911 -0.176 0.8616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared: 0.001412, Adjusted R-squared: -0.04398
## F-statistic: 0.0311 on 1 and 22 DF, p-value: 0.8616
lm_prod_vs_rich <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_prod_vs_rich)##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1271 -2.8865 -0.4649 3.7537 9.8401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.269531 5.717450 -1.971 0.07701 .
## mean 0.038125 0.009868 3.863 0.00314 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared: 0.5988, Adjusted R-squared: 0.5587
## F-statistic: 14.93 on 1 and 10 DF, p-value: 0.003143
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.094 -11.112 -2.755 8.611 33.308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.40793 21.62006 0.250 0.808
## mean 0.05511 0.06207 0.888 0.395
##
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared: 0.07306, Adjusted R-squared: -0.01963
## F-statistic: 0.7882 on 1 and 10 DF, p-value: 0.3955
# Plot
prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
annotate("text", x = 790, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_rich)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_rich)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs per cell production
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness)) ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8668 -0.2124 0.1045 0.2133 0.6473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.4591277 0.2212633 -38.231 < 2e-16 ***
## mean 0.0028988 0.0004641 6.246 3.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3804 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6501, Adjusted R-squared: 0.6334
## F-statistic: 39.02 on 1 and 21 DF, p-value: 3.395e-06
lm_percell_prod_vs_rich <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55347 -0.21545 -0.01066 0.12536 0.58830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.0617648 0.3717671 -21.685 4.44e-09 ***
## mean 0.0023794 0.0006348 3.748 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3506 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6095, Adjusted R-squared: 0.5662
## F-statistic: 14.05 on 1 and 9 DF, p-value: 0.004567
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71719 -0.13833 0.07155 0.23581 0.56949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.135758 0.471253 -17.264 8.99e-09 ***
## mean 0.001657 0.001353 1.225 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared: 0.1304, Adjusted R-squared: 0.04344
## F-statistic: 1.499 on 1 and 10 DF, p-value: 0.2488
# Plot
percell_prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 790, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_rich)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_rich)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
rich_plots <- plot_grid(rich_distribution_plot, prod_vs_rich_plot, percell_prod_vs_rich_plot,
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
rich_plots######################################################### INVERSE SIMPSON
invsimps <- filter(otu_alphadiv, measure == "Inverse_Simpson")
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
group_by(fraction) %>%
summarize(mean(mean))## # A tibble: 2 × 2
## fraction `mean(mean)`
## <fctr> <dbl>
## 1 Particle 35.47659
## 2 Free 24.09219
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_distribution_plot <-
ggplot(invsimps, aes(y = mean, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
ylab("Inverse Simpson") +
xlab("Fraction") +
##### Particle vs free per-cell production
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# INVERSE SIMPSON VS BULK PRODUCTION
summary(lm(frac_bacprod ~ mean, data = invsimps)) ##
## Call:
## lm(formula = frac_bacprod ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.269 -10.298 -4.916 5.866 46.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.1577 6.0225 2.019 0.0559 .
## mean 0.1629 0.1731 0.941 0.3570
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared: 0.03868, Adjusted R-squared: -0.005019
## F-statistic: 0.8851 on 1 and 22 DF, p-value: 0.357
lm_prod_vs_invsimps <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps)##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5589 -2.1093 -0.1969 0.8752 7.8282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.45199 2.43866 -0.185 0.85666
## mean 0.29344 0.05781 5.076 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6925
## F-statistic: 25.77 on 1 and 10 DF, p-value: 0.0004804
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.765 -9.356 -4.445 6.116 36.017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.0985 16.7052 0.664 0.521
## mean 0.5379 0.6598 0.815 0.434
##
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared: 0.06233, Adjusted R-squared: -0.03143
## F-statistic: 0.6648 on 1 and 10 DF, p-value: 0.4339
# Plot
prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
annotate("text", x = 70, y=45, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_invsimps)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_invsimps)$coefficients[,4][2]), digits = 4))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Inverse Simpson vs per-cell production
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)) ##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97302 -0.28199 -0.05285 0.32003 0.99088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.86039 0.18153 -43.301 < 2e-16 ***
## mean 0.02355 0.00525 4.485 0.000204 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4596 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4893, Adjusted R-squared: 0.4649
## F-statistic: 20.12 on 1 and 21 DF, p-value: 0.0002037
lm_percell_prod_vs_invsimps <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28651 -0.18384 -0.11125 0.07337 0.56444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.360961 0.159490 -46.153 5.27e-12 ***
## mean 0.018087 0.003759 4.812 0.000958 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2969 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7201, Adjusted R-squared: 0.689
## F-statistic: 23.15 on 1 and 9 DF, p-value: 0.0009581
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68269 -0.11512 0.01325 0.17742 0.63175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.03513 0.35685 -22.517 6.72e-10 ***
## mean 0.01910 0.01409 1.355 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared: 0.1551, Adjusted R-squared: 0.07064
## F-statistic: 1.836 on 1 and 10 DF, p-value: 0.2052
# Plot
percell_prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_point(size = 3.5, shape = 21, color = "black", aes(fill = fraction)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 70, y=-7.5, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_invsimps)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_invsimps)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
invsimps_plotsplot_grid(rich_plots, invsimps_plots,
ncol = 2, nrow = 1,
align = "h")invsimps_plots_noyaxis <-
plot_grid(invsimps_distribution_plot + theme(axis.title.y = element_blank(), axis.text.y = element_blank()),
prod_vs_invsimps_plot + theme(axis.title.y = element_blank()),
percell_prod_vs_invsimps_plot + theme(axis.title.y = element_blank()),
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
plot_grid(rich_plots, invsimps_plots_noyaxis,
ncol = 2, nrow = 1, rel_widths = c(1, 0.825),
align = "h")# Read in the tree
RAREFIED_rm2_fasttree <- read.tree(file = "data/PhyloTree/newick_tree_rm2_rmN.tre")
# Load in data that has doubletons removed
load("data/PhyloTree/surface_PAFL_otu_pruned_RAREFIED_rm2.RData")
surface_PAFL_otu_pruned_RAREFIED_rm2## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 1891 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 70 sample variables ]
## tax_table() Taxonomy Table: [ 1891 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 1891 tips and 1889 internal nodes ]
# Create the OTU table for picante
surface_PAFL_RAREFIED_rm2_otu <- matrix(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2), nrow = nrow(otu_table(surface_PAFL_otu_pruned_RAREFIED_rm2)))
rownames(surface_PAFL_RAREFIED_rm2_otu) <- sample_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
colnames(surface_PAFL_RAREFIED_rm2_otu) <- taxa_names(surface_PAFL_otu_pruned_RAREFIED_rm2)
## Calculate input for SES_MPD
# Convert the abundance data to standardized abundanced vegan function `decostand' , NOTE: method = "total"
otu_decostand_total <- decostand(surface_PAFL_RAREFIED_rm2_otu, method = "total")
# check total abundance in each sample
apply(otu_decostand_total, 1, sum)## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
# check for mismatches/missing species between community data and phylo tree
RAREFIED_rm2_matches <- match.phylo.comm(RAREFIED_rm2_fasttree, otu_decostand_total)
# the resulting object is a list with $phy and $comm elements. replace our
# original data with the sorted/matched data
phy_RAREFIED_rm2 <- RAREFIED_rm2_matches$phy
comm_RAREFIED_rm2 <- RAREFIED_rm2_matches$comm
# Calculate the phylogenetic distances
phy_dist_RAREFIED_rm2 <- cophenetic(phy_RAREFIED_rm2)
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap",
abundance.weighted = FALSE, runs = 999)
WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 <- ses.mpd(comm_RAREFIED_rm2, phy_dist_RAREFIED_rm2, null.model = "independentswap",
abundance.weighted = TRUE, runs = 999)
# Gather div info
rich_df <- filter(otu_alphadiv, measure == "Richness") %>%
dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)
invsimps_df <- filter(otu_alphadiv, measure == "Inverse_Simpson") %>%
dplyr::select(norep_filter_name, mean, sd, frac_bacprod, SD_frac_bacprod, fracprod_per_cell_noinf)
# Prepare to be merged with each other
unweighted_df <- unweighted_sesMPD_indepswap_RAREFIED_rm2 %>%
rownames_to_column("names") %>%
mutate(phylo_measure = "Unweighted_SESMPD") %>%
make_metadata_norep() %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
rename(norep_filter_name = names) %>%
left_join(rich_df, by = "norep_filter_name") %>%
mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")))## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector
weighted_df <- WEIGHTED_sesMPD_indepswap_RAREFIED_rm2 %>%
rownames_to_column("names") %>%
mutate(phylo_measure = "Weighted_SESMPD") %>%
make_metadata_norep() %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree")) %>%
rename(norep_filter_name = names) %>%
left_join(invsimps_df, by = "norep_filter_name") %>%
mutate(lakesite = factor(lakesite, levels = c("MOT", "MDP","MBR", "MIN")))## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining factor and character vector, coercing into character vector
# Test by station
part_unweighted_df <- filter(unweighted_df, fraction == "Particle")
kruskal.test(mpd.obs.z ~ lakesite, data = filter(unweighted_df, fraction == "Particle"))##
## Kruskal-Wallis rank sum test
##
## data: mpd.obs.z by lakesite
## Kruskal-Wallis chi-squared = 5.8718, df = 3, p-value = 0.118
unweighted_df %>%
filter(fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mpd.obs.z))## # A tibble: 4 × 2
## lakesite `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 MOT 0.7949746
## 2 MDP -0.9214585
## 3 MBR -0.7819050
## 4 MIN -1.0800320
# Lakesite
plot_part_unweight_lakesite <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = lakesite)) +
ggtitle("Particle-Associated Samples Only") +
scale_fill_manual(values = lakesite_colors) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
geom_jitter(size = 3, shape = 21, aes(fill = lakesite), width = 0.2) +
geom_boxplot(aes(fill = lakesite), alpha = 0.5) +
scale_x_discrete(breaks=c("MOT", "MDP", "MBR", "MIN"),
labels=c("Outlet", "Deep", "Bear", "River")) +
theme(axis.title.x = element_blank(),
legend.position = c(0.85, 0.9))
# Season
plot_part_unweight_season <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = season)) +
ggtitle("Particle-Associated Samples Only") +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = season_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = season), width = 0.2) +
geom_boxplot(aes(fill = season), alpha = 0.5) +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = c(0.9, 0.9))
plot_grid(plot_part_unweight_lakesite, plot_part_unweight_season,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")######################################################### SES MPD DISTRIBUTION: UNWEIGHTED
unweighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = unweighted_df)
unweighted_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 30, p-value = 0.01449
## alternative hypothesis: true location shift is not equal to 0
unweighted_df %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))## # A tibble: 2 × 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.4971052
## 2 Free 0.4374514
# Make a data frame to draw significance line in boxplot (visually calculated)
dat4 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
unweight_distribution_plot <-
ggplot(unweighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat4, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.35, fontface = "bold", size = 4, color = "#424645",
label= paste("***\np =", round(unweighted_fraction_wilcox$p.value, digits = 2))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
######################################################### SES MPD DISTRIBUTION: WEIGHTED
weighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = weighted_df)
weighted_fraction_wilcox##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(weighted_df) %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))## # A tibble: 2 × 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.3607944
## 2 Free -0.3854885
# Make a data frame to draw significance line in boxplot (visually calculated)
dat5 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
weight_distribution_plot <-
ggplot(weighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat5, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.55, fontface = "bold", size = 3.5, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(mean ~ mpd.obs.z, data = unweighted_df))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -195.76 -96.00 -14.21 80.09 295.21
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 443.89 28.29 15.690 1.98e-13 ***
## mpd.obs.z -124.90 34.37 -3.634 0.00147 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 138.5 on 22 degrees of freedom
## Multiple R-squared: 0.3751, Adjusted R-squared: 0.3467
## F-statistic: 13.21 on 1 and 22 DF, p-value: 0.001466
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -191.77 -112.16 -41.89 55.88 278.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 515.16 51.16 10.069 1.49e-06 ***
## mpd.obs.z -83.77 49.91 -1.678 0.124
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 155 on 10 degrees of freedom
## Multiple R-squared: 0.2198, Adjusted R-squared: 0.1417
## F-statistic: 2.817 on 1 and 10 DF, p-value: 0.1242
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.15 -66.22 -18.70 43.65 172.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 337.091 42.747 7.886 1.34e-05 ***
## mpd.obs.z 3.049 77.511 0.039 0.969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.18 on 10 degrees of freedom
## Multiple R-squared: 0.0001547, Adjusted R-squared: -0.09983
## F-statistic: 0.001547 on 1 and 10 DF, p-value: 0.9694
unweight_rich_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = mean, x = mpd.obs.z)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) + ylab("Observed Richness") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=750, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between inverse simpson and Weighted Mean Pairwise distance?
#summary(lm(mean ~ mpd.obs.z, data = weighted_df)) # NS
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.42 -19.15 0.40 12.95 40.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.58 12.61 1.870 0.0911 .
## mpd.obs.z -32.98 29.43 -1.121 0.2886
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.57 on 10 degrees of freedom
## Multiple R-squared: 0.1116, Adjusted R-squared: 0.02276
## F-statistic: 1.256 on 1 and 10 DF, p-value: 0.2886
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0597 -4.3465 -0.8155 3.8832 18.4294
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.187 4.720 4.701 0.00084 ***
## mpd.obs.z -4.942 10.486 -0.471 0.64753
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.441 on 10 degrees of freedom
## Multiple R-squared: 0.02173, Adjusted R-squared: -0.0761
## F-statistic: 0.2221 on 1 and 10 DF, p-value: 0.6475
weight_invsimps_vs_mpd_plot <-
ggplot(weighted_df, aes(y = mean, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) + ylab("Inverse Simpson") +
#xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between Production and Unweighted Mean Pairwise distance?
#summary(lm(frac_bacprod ~ mpd.obs.z, data = unweighted_df)) # NS
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.145 -4.312 -1.824 3.402 19.528
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.213 2.617 3.138 0.0105 *
## mpd.obs.z -3.511 2.553 -1.375 0.1991
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.928 on 10 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.07494
## F-statistic: 1.891 on 1 and 10 DF, p-value: 0.1991
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.997 -14.937 2.190 8.751 37.007
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.194 8.397 2.167 0.0555 .
## mpd.obs.z 13.407 15.225 0.881 0.3992
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.71 on 10 degrees of freedom
## Multiple R-squared: 0.07196, Adjusted R-squared: -0.02084
## F-statistic: 0.7754 on 1 and 10 DF, p-value: 0.3992
unweight_prod_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between Production and Weighted Mean Pairwise distance?
summary(lm(frac_bacprod ~ mpd.obs.z, data = weighted_df))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.030 -10.236 -2.842 5.937 38.409
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.008 5.664 1.590 0.126
## mpd.obs.z -21.439 12.889 -1.663 0.110
##
## Residual standard error: 14.66 on 22 degrees of freedom
## Multiple R-squared: 0.1117, Adjusted R-squared: 0.07133
## F-statistic: 2.767 on 1 and 22 DF, p-value: 0.1104
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.240 -4.575 -1.717 4.503 15.352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.391 4.126 1.064 0.312
## mpd.obs.z -15.432 9.627 -1.603 0.140
##
## Residual standard error: 7.711 on 10 degrees of freedom
## Multiple R-squared: 0.2044, Adjusted R-squared: 0.1249
## F-statistic: 2.569 on 1 and 10 DF, p-value: 0.14
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.790 -12.275 -3.481 7.360 30.573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.697 9.683 1.518 0.160
## mpd.obs.z -24.285 21.512 -1.129 0.285
##
## Residual standard error: 17.32 on 10 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.02434
## F-statistic: 1.274 on 1 and 10 DF, p-value: 0.2853
weight_prod_vs_mpd_plot <-
ggplot(weighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and Unweighted Mean Pairwise distance?
unweight_vs_percell <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
summary(unweight_vs_percell)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72262 -0.28981 0.00389 0.12961 1.31931
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.1988 0.0999 -72.063 < 2e-16 ***
## mpd.obs.z -0.4973 0.1205 -4.128 0.000479 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4778 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4479, Adjusted R-squared: 0.4216
## F-statistic: 17.04 on 1 and 21 DF, p-value: 0.0004788
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.37955 -0.24192 -0.16507 0.04426 1.18158
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9247 0.1711 -40.472 1.71e-11 ***
## mpd.obs.z -0.3299 0.1627 -2.027 0.0732 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4649 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3135, Adjusted R-squared: 0.2372
## F-statistic: 4.11 on 1 and 9 DF, p-value: 0.07324
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6398 -0.2978 0.0770 0.1760 0.7651
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.55616 0.19601 -38.550 3.29e-12 ***
## mpd.obs.z -0.04317 0.35540 -0.121 0.906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4135 on 10 degrees of freedom
## Multiple R-squared: 0.001473, Adjusted R-squared: -0.09838
## F-statistic: 0.01475 on 1 and 10 DF, p-value: 0.9057
unweight_percell_vs_mpd_plot <-
ggplot(unweighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color="#424645", fill = "#424645", alpha = 0.3) +
#stat_smooth(method="lm", se=TRUE, formula= y ~ poly(x, 2, raw=TRUE), color="#424645", fill = "#424645", alpha = 0.3) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=-6, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(unweight_vs_percell)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(unweight_vs_percell)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Is there a relationship between PER-CELL PRODUCTION and Weighted Mean Pairwise distance?
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94765 -0.40616 -0.03619 0.31885 1.39101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5352 0.2442 -30.860 <2e-16 ***
## mpd.obs.z -0.9519 0.5446 -1.748 0.0951 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6009 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.127, Adjusted R-squared: 0.08544
## F-statistic: 3.055 on 1 and 21 DF, p-value: 0.09508
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65329 -0.32632 -0.03486 0.22224 0.95307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.0845 0.3015 -23.496 2.18e-09 ***
## mpd.obs.z -0.9337 0.6753 -1.383 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5096 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1752, Adjusted R-squared: 0.08357
## F-statistic: 1.912 on 1 and 9 DF, p-value: 0.2001
lm_percell_free_mpd <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free"))
summary(lm_percell_free_mpd)##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54189 -0.16159 -0.00204 0.20070 0.44618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9519 0.1848 -43.019 1.11e-12 ***
## mpd.obs.z -0.9777 0.4107 -2.381 0.0386 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3306 on 10 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.2979
## F-statistic: 5.667 on 1 and 10 DF, p-value: 0.03857
weight_percell_vs_mpd_plot <-
ggplot(weighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", data = filter(weighted_df, fraction == "Free"), color = "skyblue", fill = "skyblue", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
annotate("text", x = 0.3, y=-7.9, color = "skyblue", fontface = "bold",
label = paste("R2 =", round(summary(lm_percell_free_mpd)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_free_mpd)$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_grid(unweight_distribution_plot, unweight_rich_vs_mpd_plot, unweight_prod_vs_mpd_plot, unweight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
plot_grid(weight_distribution_plot, weight_invsimps_vs_mpd_plot, weight_prod_vs_mpd_plot, weight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for <bc>
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font metrics unknown for Unicode character U+03bc
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <ce>
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): conversion failure on ' (μgC/cell/hr)' in 'mbcsToSbcs': dot substituted for <bc>
######################################################### Subset only richness data
### These are the richness values for the fake samples
#rich_stats <- filter(otu_alphadiv, measure == "Richness") %>%
# dplyr::select(1:2) %>%
# rename(mean_richness = mean) %>%
# mutate(sample = paste("Sample_", seq(1:nrow(filter(otu_alphadiv, measure == "Richness"))), sep = ""),
# mean_richness = matround(mean_richness))
## Pick OTUs to match these richness values
# List the otus from ALL samples
# all_otus <- taxa_names(surface_PAFL_otu_pruned_rm2)
# Obtain the OTU table from the phyloseq object
# otutab <- otu_table(surface_PAFL_otu_pruned_rm2)
# Make all the counts to be 0
# otutab_newvals <- apply(otutab, c(1, 2), function(x) 0)
# Stop if things are wrong
# stopifnot(colnames(otutab_newvals) == all_otus) # Do the OTU names match?
# stopifnot(rownames(otutab_newvals) == rich_stats$norep_filter_name) # Do the sample names match?
# Make it reproducible!
#set.seed(777)
#for (row in 1:nrow(rich_stats)) {
# Pick the richness value
# rich_val <- rich_stats[row, 2]
# Sample the OTUs to represent that richness value
# col_index <- sample(ncol(otutab_newvals), rich_val, replace = FALSE, prob = NULL)
# make all other columns 0
# otutab_newvals[row, col_index] <- 1
#}
## Calculate the tree for those randomized samples
# create a new phyloseq object
#random_physeq_presab_raw <- phyloseq(otu_table(otutab_newvals, taxa_are_rows = FALSE),
# tax_table(surface_PAFL_otu_pruned_rm2), sample_data(surface_PAFL_otu_pruned_rm2))
#random_physeq_presab_raw
# Remove taxa that are 0!
#random_physeq_presab_pruned <- prune_taxa(taxa_sums(random_physeq_presab_raw) > 0, random_physeq_presab_raw)
#random_physeq_presab_pruned
# Calculate tree
# Obtain the OTU names that were retained in the dataset
#tax <- data.frame(tax_table(random_physeq_presab_pruned))
#vector_of_otus <- as.vector(tax$OTU)
# Write out the file for processing/fasttree
# write(vector_of_otus, file = "data/PhyloTree/randomized/random_physeq_presab_pruned_OTUnames.txt", ncolumns = 1, append = FALSE, sep = "\n")# Read in the tree
randomized_tree <- read.tree(file = "data/PhyloTree/randomized/newick_tree_randomized_rmN.tre")
#random_physeq_presab_pruned_tree <- merge_phyloseq(random_physeq_presab_pruned, randomized_tree)
#random_physeq_presab_pruned_tree
#save(list="random_physeq_presab_pruned_tree", file=paste0("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData"))
load("data/PhyloTree/randomized/random_physeq_presab_pruned_tree.RData")
random_physeq_presab_pruned_tree## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2911 taxa and 24 samples ]
## sample_data() Sample Data: [ 24 samples by 44 sample variables ]
## tax_table() Taxonomy Table: [ 2911 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2911 tips and 2909 internal nodes ]
# First force the OTU
randomized_otu <- matrix(otu_table(random_physeq_presab_pruned_tree), nrow = nrow(otu_table(random_physeq_presab_pruned_tree)))
rownames(randomized_otu) <- sample_names(random_physeq_presab_pruned_tree)
colnames(randomized_otu) <- taxa_names(random_physeq_presab_pruned_tree)
## Calculate input for SES_MPD
# Convert the presence/absence data to standardized abundanced vegan function `decostand' , NOTE: method = "pa"
otu_decostand <- decostand(randomized_otu, method = "pa")
# check total abundance in each sample
apply(otu_decostand, 1, sum)## MBREJ515 MBREJ715 MBREJ915 MBREK515 MBREK715 MBREK915 MDPEJ515 MDPEJ715 MDPEJ915 MDPEK515 MDPEK715 MDPEK915 MINEJ515 MINEJ715 MINEJ915 MINEK515 MINEK715 MINEK915 MOTEJ515 MOTEJ715 MOTEJ915 MOTEK515 MOTEK715 MOTEK915
## 906 574 434 268 256 358 493 447 476 276 284 381 840 632 586 452 383 511 505 343 444 274 238 381
# check for mismatches/missing species between community data and phylo tree
randomized_matches <- match.phylo.comm(randomized_tree, otu_decostand)
# the resulting object is a list with $phy and $comm elements. replace our
# original data with the sorted/matched data
phy_randomized_rm2 <- randomized_matches$phy
comm_randomized_rm2 <- randomized_matches$comm
# Calculate the phylogenetic distances
phy_dist_randomized_rm2 <- cophenetic(phy_randomized_rm2)
## Calculate SES_MPD
###################################### INDEPENDENT SWAP ############################################
# calculate standardized effect size mean pairwise distance (ses.mpd)
unweighted_sesMPD_indepswap_randomized <- ses.mpd(comm_randomized_rm2, phy_dist_randomized_rm2, null.model = "independentswap",
abundance.weighted = FALSE, runs = 999)
df <- unweighted_sesMPD_indepswap_randomized
df$names <- row.names(df)
df_2 <- make_metadata_norep(df) %>%
mutate(fraction = fct_recode(fraction, Particle = "WholePart", Free = "WholeFree"))
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(ntaxa ~ mpd.obs.z, data = df_2))##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = df_2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -212.74 -129.44 -12.54 53.58 468.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 448.27 35.48 12.633 1.47e-11 ***
## mpd.obs.z 56.32 94.24 0.598 0.556
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 173.7 on 22 degrees of freedom
## Multiple R-squared: 0.01598, Adjusted R-squared: -0.02875
## F-statistic: 0.3572 on 1 and 22 DF, p-value: 0.5562
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Particle")))##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -203.87 -109.15 -44.96 44.29 341.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 557.62 50.67 11.006 6.56e-07 ***
## mpd.obs.z -33.22 140.62 -0.236 0.818
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 175 on 10 degrees of freedom
## Multiple R-squared: 0.00555, Adjusted R-squared: -0.0939
## F-statistic: 0.05581 on 1 and 10 DF, p-value: 0.818
summary(lm(ntaxa ~ mpd.obs.z, data = filter(df_2, fraction == "Free")))##
## Call:
## lm(formula = ntaxa ~ mpd.obs.z, data = filter(df_2, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -107.76 -48.80 -17.85 56.29 159.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 342.48 24.62 13.913 7.19e-08 ***
## mpd.obs.z 74.88 62.79 1.193 0.261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 84.48 on 10 degrees of freedom
## Multiple R-squared: 0.1245, Adjusted R-squared: 0.03699
## F-statistic: 1.422 on 1 and 10 DF, p-value: 0.2605
ggplot(df_2, aes(y = ntaxa, x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21) + ylab("Randomized Richness") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1,1)) +
theme(legend.title = element_blank(), legend.position = c(0.15, 0.9))